{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Collaborative Filtering using SVD++ "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Load the relevant packages**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from surprise import SVDpp\n",
    "from surprise import Dataset\n",
    "from surprise.model_selection import cross_validate\n",
    "from surprise.model_selection import train_test_split\n",
    "from surprise import accuracy\n",
    "\n",
    "\n",
    "# Use the famous SVD algorithm.\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "** Process the data**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load the movie lens 10k data and split the data into train test files(80:20)\n",
    "data = Dataset.load_builtin('ml-100k')\n",
    "trainset, testset = train_test_split(data, test_size=.2)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "** Perform 5 fold cv on the Entire data **"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Evaluating RMSE, MAE of algorithm SVDpp on 5 split(s).\n",
      "\n",
      "                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     \n",
      "RMSE (testset)    0.9196  0.9051  0.9037  0.9066  0.9151  0.9100  0.0062  \n",
      "MAE (testset)     0.7273  0.7169  0.7115  0.7143  0.7228  0.7186  0.0058  \n",
      "Fit time          374.57  374.58  369.74  385.44  382.36  377.34  5.72    \n",
      "Test time         2.53    2.63    2.74    2.79    2.84    2.71    0.11    \n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'fit_time': (374.5723247528076,\n",
       "  374.5811743736267,\n",
       "  369.7436046600342,\n",
       "  385.44033312797546,\n",
       "  382.3610692024231),\n",
       " 'test_mae': array([0.72730758, 0.71690884, 0.71145647, 0.71432897, 0.7228267 ]),\n",
       " 'test_rmse': array([0.91959167, 0.9051455 , 0.9037419 , 0.9065999 , 0.9150775 ]),\n",
       " 'test_time': (2.534454584121704,\n",
       "  2.626168727874756,\n",
       "  2.738757610321045,\n",
       "  2.7929108142852783,\n",
       "  2.8424782752990723)}"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Perform 5 fold cross validation with all data \n",
    "algo = SVDpp(n_factors=40, n_epochs=40, lr_all=0.008, reg_all=0.1)\n",
    "# Run 5-fold cross-validation and print results.\n",
    "cross_validate(algo,data, measures=['RMSE', 'MAE'], cv=5, verbose=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "** Train Model with 80 % data**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<surprise.prediction_algorithms.matrix_factorization.SVDpp at 0x7f094972b9e8>"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Train the Model\n",
    "model = SVDpp(n_factors=40, n_epochs=10, lr_all=0.008, reg_all=0.1)\n",
    "model.fit(trainset)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "** Validate the Model on the hold out 20% data**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "SVD++ results on the Test Set\n",
      "RMSE: 0.9320\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "0.93200005549325"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#validate the model\n",
    "pred = model.test(testset)\n",
    "print(\"SVD++ results on the Test Set\")\n",
    "accuracy.rmse(pred, verbose=True)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
